# load necessary packages
library(tidyverse)
[30m── [1mAttaching packages[22m ────────────────────────────────────────────────────────────────────────────────────────────────────── tidyverse 1.3.0 ──[39m
[30m[32m✔[30m [34mggplot2[30m 3.2.1 [32m✔[30m [34mpurrr [30m 0.3.3
[32m✔[30m [34mtibble [30m 2.1.3 [32m✔[30m [34mdplyr [30m 0.8.3
[32m✔[30m [34mtidyr [30m 1.0.2 [32m✔[30m [34mstringr[30m 1.4.0
[32m✔[30m [34mreadr [30m 1.3.1 [32m✔[30m [34mforcats[30m 0.5.0[39m
[30m── [1mConflicts[22m ───────────────────────────────────────────────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
[31m✖[30m [34mdplyr[30m::[32mfilter()[30m masks [34mstats[30m::filter()
[31m✖[30m [34mdplyr[30m::[32mlag()[30m masks [34mstats[30m::lag()[39m
library(mosaic)
Loading required package: lattice
Loading required package: ggformula
Loading required package: ggstance
Attaching package: ‘ggstance’
The following objects are masked from ‘package:ggplot2’:
geom_errorbarh, GeomErrorbarh
New to ggformula? Try the tutorials:
learnr::run_tutorial("introduction", package = "ggformula")
learnr::run_tutorial("refining", package = "ggformula")
Loading required package: mosaicData
Loading required package: Matrix
Attaching package: ‘Matrix’
The following objects are masked from ‘package:tidyr’:
expand, pack, unpack
The 'mosaic' package masks several functions from core packages in order to add
additional features. The original behavior of these functions should not be affected by this.
Note: If you use the Matrix package, be sure to load it BEFORE loading mosaic.
Attaching package: ‘mosaic’
The following object is masked from ‘package:Matrix’:
mean
The following objects are masked from ‘package:dplyr’:
count, do, tally
The following object is masked from ‘package:purrr’:
cross
The following object is masked from ‘package:ggplot2’:
stat
The following objects are masked from ‘package:stats’:
binom.test, cor, cor.test, cov, fivenum, IQR, median, prop.test, quantile, sd, t.test, var
The following objects are masked from ‘package:base’:
max, mean, min, prod, range, sample, sum
library(DataComputing)
library(ggplot2)
What factors are common between all hall of fame MLB baseball players?
This document is required to indicate where various requirements can be found within your Final Project Report Rmd. You must indicate line numbers as they appear in your final Rmd document accompanying each of the following required tasks. Points will be deducted if line numbers are missing or differ signficantly from the submitted Final Rmd document.
Description: (1) Analysis includes at least two different data sources. (2) Primary data source may NOT be loaded from an R package–though supporting data may. (3) Access to all data sources is contained within the analysis. (4) Imported data is inspected at beginning of analysis using one or more R functions: e.g., str, glimpse, head, tail, names, nrow, etc
HallOfFame <- read_csv("core/HallOfFame.csv")
Parsed with column specification:
cols(
playerID = [31mcol_character()[39m,
yearID = [32mcol_double()[39m,
votedBy = [31mcol_character()[39m,
ballots = [32mcol_double()[39m,
needed = [32mcol_double()[39m,
votes = [32mcol_double()[39m,
inducted = [31mcol_character()[39m,
category = [31mcol_character()[39m,
needed_note = [31mcol_character()[39m
)
AllstarFull <- read_csv("core/AllstarFull.csv")
Parsed with column specification:
cols(
playerID = [31mcol_character()[39m,
yearID = [32mcol_double()[39m,
gameNum = [32mcol_double()[39m,
gameID = [31mcol_character()[39m,
teamID = [31mcol_character()[39m,
lgID = [31mcol_character()[39m,
GP = [32mcol_double()[39m,
startingPos = [32mcol_double()[39m
)
Salaries <- read_csv("core/Salaries.csv")
Parsed with column specification:
cols(
yearID = [32mcol_double()[39m,
teamID = [31mcol_character()[39m,
lgID = [31mcol_character()[39m,
playerID = [31mcol_character()[39m,
salary = [32mcol_double()[39m
)
Batting <- read_csv("core/Batting.csv")
Parsed with column specification:
cols(
.default = col_double(),
playerID = [31mcol_character()[39m,
teamID = [31mcol_character()[39m,
lgID = [31mcol_character()[39m,
IBB = [33mcol_logical()[39m,
HBP = [33mcol_logical()[39m,
SH = [33mcol_logical()[39m,
SF = [33mcol_logical()[39m
)
See spec(...) for full column specifications.
87292 parsing failures.
row col expected actual file
1999 HBP 1/0/T/F/TRUE/FALSE 2 'core/Batting.csv'
2001 HBP 1/0/T/F/TRUE/FALSE 2 'core/Batting.csv'
2020 HBP 1/0/T/F/TRUE/FALSE 2 'core/Batting.csv'
2022 HBP 1/0/T/F/TRUE/FALSE 2 'core/Batting.csv'
2027 HBP 1/0/T/F/TRUE/FALSE 5 'core/Batting.csv'
.... ... .................. ...... ..................
See problems(...) for more details.
head(HallOfFame)
glimpse(HallOfFame)
Observations: 4,191
Variables: 9
$ playerID [3m[38;5;246m<chr>[39m[23m "cobbty01", "ruthba01", "wagneho01", "mathech01", "johnswa01", "lajoina01", "speaktr01", "youngcy01", "hornsro01", "coch…
$ yearID [3m[38;5;246m<dbl>[39m[23m 1936, 1936, 1936, 1936, 1936, 1936, 1936, 1936, 1936, 1936, 1936, 1936, 1936, 1936, 1936, 1936, 1936, 1936, 1936, 1936, …
$ votedBy [3m[38;5;246m<chr>[39m[23m "BBWAA", "BBWAA", "BBWAA", "BBWAA", "BBWAA", "BBWAA", "BBWAA", "BBWAA", "BBWAA", "BBWAA", "BBWAA", "BBWAA", "BBWAA", "BB…
$ ballots [3m[38;5;246m<dbl>[39m[23m 226, 226, 226, 226, 226, 226, 226, 226, 226, 226, 226, 226, 226, 226, 226, 226, 226, 78, 78, 78, 226, 78, 78, 226, 226, …
$ needed [3m[38;5;246m<dbl>[39m[23m 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 59, 59, 59, 170, 59, 59, 170, 170, …
$ votes [3m[38;5;246m<dbl>[39m[23m 222, 215, 215, 205, 189, 146, 133, 111, 105, 80, 77, 60, 58, 55, 51, 47, 40, 39, 39, 33, 33, 32, 21, 21, 20, 17, 17, 16,…
$ inducted [3m[38;5;246m<chr>[39m[23m "Y", "Y", "Y", "Y", "Y", "N", "N", "N", "N", "N", "N", "N", "N", "N", "N", "N", "N", "N", "N", "N", "N", "N", "N", "N", …
$ category [3m[38;5;246m<chr>[39m[23m "Player", "Player", "Player", "Player", "Player", "Player", "Player", "Player", "Player", "Player", "Player", "Player", …
$ needed_note [3m[38;5;246m<chr>[39m[23m NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
head(Salaries)
glimpse(Salaries)
Observations: 26,428
Variables: 5
$ yearID [3m[38;5;246m<dbl>[39m[23m 1985, 1985, 1985, 1985, 1985, 1985, 1985, 1985, 1985, 1985, 1985, 1985, 1985, 1985, 1985, 1985, 1985, 1985, 1985, 1985, 198…
$ teamID [3m[38;5;246m<chr>[39m[23m "ATL", "ATL", "ATL", "ATL", "ATL", "ATL", "ATL", "ATL", "ATL", "ATL", "ATL", "ATL", "ATL", "ATL", "ATL", "ATL", "ATL", "ATL…
$ lgID [3m[38;5;246m<chr>[39m[23m "NL", "NL", "NL", "NL", "NL", "NL", "NL", "NL", "NL", "NL", "NL", "NL", "NL", "NL", "NL", "NL", "NL", "NL", "NL", "NL", "NL…
$ playerID [3m[38;5;246m<chr>[39m[23m "barkele01", "bedrost01", "benedbr01", "campri01", "ceronri01", "chambch01", "dedmoje01", "forstte01", "garbege01", "harpet…
$ salary [3m[38;5;246m<dbl>[39m[23m 870000, 550000, 545000, 633333, 625000, 800000, 150000, 483333, 772000, 250000, 1500000, 455000, 407500, 275000, 775000, 16…
head(Batting)